{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 4 Pandas的索引操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:16:36.632449500Z",
     "start_time": "2024-07-05T08:16:35.944482200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index([0, 1, 2, 3], dtype='int64')\n"
     ]
    }
   ],
   "source": [
    "\n",
    "dict_data = {'A': 1,\n",
    "             'B': pd.Timestamp('20190926'),\n",
    "             'C': pd.Series(1, index=list(range(4)),dtype='float32'),\n",
    "             'D': np.array([1,2,3,4],dtype='int32'),\n",
    "             'E': [\"Python\",\"Java\",\"C++\",\"C\"],\n",
    "             'F': 'wangdao' }\n",
    "df_obj2 = pd.DataFrame(dict_data)\n",
    "print(df_obj2.index)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:16:41.253579800Z",
     "start_time": "2024-07-05T08:16:41.244861900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [],
   "source": [
    "# 索引对象的值不可变（上面代码增加）\n",
    "# df_obj2.index[0] = 2"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "3 常见的Index种类\n",
    "•Index，索引  可以是各种类型\n",
    "•Int64Index，整数索引\n",
    "•MultiIndex，层级索引，难度较大\n",
    "•DatetimeIndex，时间戳类型"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": "Index(['a', 'b', 'c', 'd', 'e'], dtype='object')"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser_obj = pd.Series(range(5), index = list(\"abcde\"))\n",
    "print(ser_obj)\n",
    "ser_obj.index"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:17:40.422816600Z",
     "start_time": "2024-07-05T08:17:40.395631700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_9256\\2604506650.py:3: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(ser_obj[2]) #索引位置\n"
     ]
    }
   ],
   "source": [
    "# 行索引，不仅可以用索引名，可以用索引位置或来取\n",
    "print(ser_obj['b']) #索引名\n",
    "print(ser_obj[2]) #索引位置"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:17:59.048133900Z",
     "start_time": "2024-07-05T08:17:59.034595600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n"
     ]
    }
   ],
   "source": [
    "print(ser_obj.loc['b']) #索引名\n",
    "print(ser_obj.iloc[2]) #索引位置"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:18:34.437132100Z",
     "start_time": "2024-07-05T08:18:34.427136800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 切片索引\n",
    "print(ser_obj.iloc[1:3])  #索引位置取数据，左闭右开,ndarray一致\n",
    "print(ser_obj.loc['b':'d'])  #记住索引名  左闭右闭"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:20:01.240604100Z",
     "start_time": "2024-07-05T08:20:01.228844200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "c    2\n",
      "e    4\n",
      "dtype: int64\n",
      "a    0\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 不连续索引\n",
    "print(ser_obj.iloc[[0, 2, 4]])\n",
    "print(ser_obj.loc[['a', 'e']])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:21:17.477494500Z",
     "start_time": "2024-07-05T08:21:17.467934600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "a    False\n",
      "b    False\n",
      "c    False\n",
      "d     True\n",
      "e     True\n",
      "dtype: bool\n",
      "--------------------------------------------------\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 布尔索引\n",
    "ser_bool = ser_obj > 2\n",
    "print(ser_obj)\n",
    "print(ser_bool)\n",
    "print('-'*50)\n",
    "print(ser_obj[ser_bool])\n",
    "\n",
    "print(ser_obj[ser_obj > 2]) #取出大于2的元素"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:23:00.652693200Z",
     "start_time": "2024-07-05T08:23:00.646003800Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 4.4 DataFrame索引"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "0  0.537759  0.213799  1.367047 -0.194929\n",
      "1 -1.191574  0.025920 -0.884330 -0.427654\n",
      "2  1.189818 -0.405114  0.754134 -1.768986\n",
      "3 -0.375252  0.383803  0.911833  0.423838\n",
      "4  1.002363  0.668004 -0.055305  1.331479\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "df_obj = pd.DataFrame(np.random.randn(5,4),\n",
    "                      columns = ['a', 'b', 'c', 'd'])\n",
    "print(df_obj.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:23:45.927005100Z",
     "start_time": "2024-07-05T08:23:45.906083300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.537759\n",
      "1   -1.191574\n",
      "2    1.189818\n",
      "3   -0.375252\n",
      "4    1.002363\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n",
      "          a\n",
      "0  0.537759\n",
      "1 -1.191574\n",
      "2  1.189818\n",
      "3 -0.375252\n",
      "4  1.002363\n",
      "--------------------------------------------------\n",
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "source": [
    "# 列索引\n",
    "print(df_obj.loc[:,'a']) # 返回Series类型\n",
    "print('-'*50)\n",
    "print(df_obj.loc[:,['a']]) # 返回DataFrame类型\n",
    "print('-'*50)\n",
    "print(type(df_obj.loc[:,['a']])) # 返回DataFrame类型"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:25:06.308235300Z",
     "start_time": "2024-07-05T08:25:06.266673600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a   -1.191574\n",
      "b    0.025920\n",
      "c   -0.884330\n",
      "d   -0.427654\n",
      "Name: 1, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(df_obj.loc[1,:]) # 返回Series类型"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:27:40.723306300Z",
     "start_time": "2024-07-05T08:27:40.715618600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "1. loc 标签索引(通过索引标签值获取数据)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# 标签索引 loc，建议使用loc，效率更高\n",
    "# Series\n",
    "print(ser_obj)\n",
    "print(ser_obj.loc['b':'d']) #前闭后闭\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:28:39.264854800Z",
     "start_time": "2024-07-05T08:28:39.255852900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "a -1.038192  0.050170 -1.780966  1.216981\n",
      "b  0.630710  0.295578 -0.405971  0.746307\n",
      "c -0.525507 -1.163135 -0.381273 -1.369475\n",
      "d -0.888352 -0.490646 -0.645558 -2.332155\n",
      "e  0.117791 -2.561130  0.310166 -0.270106\n",
      "--------------------------------------------------\n",
      "a   -1.038192\n",
      "b    0.050170\n",
      "c   -1.780966\n",
      "d    1.216981\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n",
      "          b         c         d\n",
      "a  0.050170 -1.780966  1.216981\n",
      "b  0.295578 -0.405971  0.746307\n",
      "c -1.163135 -0.381273 -1.369475\n",
      "          b         d\n",
      "a  0.050170  1.216981\n",
      "c -1.163135 -1.369475\n",
      "          b\n",
      "c -1.163135\n",
      "-1.1631351768422722\n"
     ]
    }
   ],
   "source": [
    "# DataFrame\n",
    "df_obj = pd.DataFrame(np.random.randn(5,4),\n",
    "                      columns = list('abcd'),\n",
    "                      index=list('abcde'))\n",
    "print(df_obj)\n",
    "print('-'*50)\n",
    "print(df_obj.loc['a'])  #拿的是行\n",
    "print('-'*50)\n",
    "# 第一个参数索引行，第二个参数是列,loc或者iloc效率高于直接用取下标的方式，前闭后闭\n",
    "print(df_obj.loc['a':'c', 'b':'d']) #连续索引\n",
    "print(df_obj.loc[['a','c'], ['b','d']]) #不连续索引\n",
    "print(df_obj.loc[['c'],['b']]) #取一个值,返回的是DataFrame类型\n",
    "print(df_obj.loc['c','b'])  #取一个值"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:29:13.147859Z",
     "start_time": "2024-07-05T08:29:13.131017600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "<class 'numpy.float64'>\n"
     ]
    }
   ],
   "source": [
    "print(type(df_obj.loc[['c'],['b']])) #取一个值,返回的是DataFrame类型\n",
    "print(type(df_obj.loc['c','b']))  #取一个值"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:32:12.910485400Z",
     "start_time": "2024-07-05T08:32:12.902234300Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## iloc 位置索引(推荐使用)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "data": {
      "text/plain": "a    0\nb    1\nc    2\nd    3\ne    4\ndtype: int64"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser_obj"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:06:36.606086100Z",
     "start_time": "2024-04-11T03:06:36.497146500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "outputs": [
    {
     "data": {
      "text/plain": "          a         b         c         d\na -1.038192  0.050170 -1.780966  1.216981\nb  0.630710  0.295578 -0.405971  0.746307\nc -0.525507 -1.163135 -0.381273 -1.369475\nd -0.888352 -0.490646 -0.645558 -2.332155\ne  0.117791 -2.561130  0.310166 -0.270106",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>a</th>\n      <th>b</th>\n      <th>c</th>\n      <th>d</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>-1.038192</td>\n      <td>0.050170</td>\n      <td>-1.780966</td>\n      <td>1.216981</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>0.630710</td>\n      <td>0.295578</td>\n      <td>-0.405971</td>\n      <td>0.746307</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>-0.525507</td>\n      <td>-1.163135</td>\n      <td>-0.381273</td>\n      <td>-1.369475</td>\n    </tr>\n    <tr>\n      <th>d</th>\n      <td>-0.888352</td>\n      <td>-0.490646</td>\n      <td>-0.645558</td>\n      <td>-2.332155</td>\n    </tr>\n    <tr>\n      <th>e</th>\n      <td>0.117791</td>\n      <td>-2.561130</td>\n      <td>0.310166</td>\n      <td>-0.270106</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_obj"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:34:22.160089700Z",
     "start_time": "2024-07-05T08:34:22.144296700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# Series\n",
    "print(ser_obj.iloc[1:3])\n",
    "print(ser_obj.iloc[1:3]) # 前闭后开[)，效率高\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:34:46.992803400Z",
     "start_time": "2024-07-05T08:34:46.980938700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "a -1.038192  0.050170 -1.780966  1.216981\n",
      "b  0.630710  0.295578 -0.405971  0.746307\n",
      "c -0.525507 -1.163135 -0.381273 -1.369475\n",
      "d -0.888352 -0.490646 -0.645558 -2.332155\n",
      "e  0.117791 -2.561130  0.310166 -0.270106\n",
      "          a         b\n",
      "a -1.038192  0.050170\n",
      "b  0.630710  0.295578\n",
      "          a         c\n",
      "a -1.038192 -1.780966\n",
      "c -0.525507 -0.381273\n",
      "-1.0381923449063826\n"
     ]
    }
   ],
   "source": [
    "# DataFrame，iloc是前闭后开[)\n",
    "print(df_obj)\n",
    "print(df_obj.iloc[0:2, 0:2]) # 注意和df_obj.loc[0:2, 'a']的区别\n",
    "print(df_obj.iloc[[0,2], [0,2]]) # 不连续索引\n",
    "print(df_obj.iloc[0,0]) # 取一个值"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:34:50.319622600Z",
     "start_time": "2024-07-05T08:34:50.307630200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0  0.155785  0.408564 -0.514448 -1.756173\n",
      "1  0.613787 -0.927472  0.371214  0.072892\n",
      "2  0.840338 -0.601645 -0.559734  0.647662\n",
      "3  0.197721  1.889629 -1.248242 -0.204272\n",
      "4  0.780178 -1.088452 -0.054849 -0.116839\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0  0.155785  0.408564 -0.514448 -1.756173\n",
      "1  0.613787 -0.927472  0.371214  0.072892\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0  0.155785  0.408564 -0.514448 -1.756173\n",
      "1  0.613787 -0.927472  0.371214  0.072892\n",
      "2  0.840338 -0.601645 -0.559734  0.647662\n"
     ]
    }
   ],
   "source": [
    "#没有设置行和列索引的DataFrame，iloc和loc的区别\n",
    "df_obj2 = pd.DataFrame(np.random.randn(5,4))\n",
    "print(df_obj2)\n",
    "print('-'*50)\n",
    "print(df_obj2.iloc[0:2]) #左闭右开\n",
    "print('-'*50)\n",
    "print(df_obj2.loc[0:2]) #左闭右闭"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:35:29.963603900Z",
     "start_time": "2024-07-05T08:35:29.959246700Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 5.对齐运算"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 3 4 5 6]\n"
     ]
    }
   ],
   "source": [
    "a=np.array([1,2,3,4,5])\n",
    "b=np.array([1])\n",
    "print(a+b)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:38:08.845891800Z",
     "start_time": "2024-07-05T08:38:08.833061100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    10\n",
      "1    11\n",
      "2    12\n",
      "3    13\n",
      "4    14\n",
      "5    15\n",
      "6    16\n",
      "7    17\n",
      "8    18\n",
      "9    19\n",
      "dtype: int64\n",
      "0    20\n",
      "1    21\n",
      "2    22\n",
      "3    23\n",
      "4    24\n",
      "dtype: int64\n",
      "s1+s2: \n",
      "0    30.0\n",
      "1    32.0\n",
      "2    34.0\n",
      "3    36.0\n",
      "4    38.0\n",
      "5     NaN\n",
      "6     NaN\n",
      "7     NaN\n",
      "8     NaN\n",
      "9     NaN\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "s1 = pd.Series(range(10, 20), index = range(10))\n",
    "s2 = pd.Series(range(20, 25), index = range(5))\n",
    "print(s1)\n",
    "print(s2)\n",
    "# Series 对齐运算\n",
    "print('s1+s2: ')\n",
    "s3=s1+s2\n",
    "print(s3)  #缺失数据默认是NaN  np.nan"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:39:13.039176800Z",
     "start_time": "2024-07-05T08:39:13.032089200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "--------------------------------------------------\n",
      "0    30.0\n",
      "1    32.0\n",
      "2    34.0\n",
      "3    36.0\n",
      "4    38.0\n",
      "5    15.0\n",
      "6    16.0\n",
      "7    17.0\n",
      "8    18.0\n",
      "9    19.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(np.isnan(s3[6]))\n",
    "print('-'*50)\n",
    "print(s2.add(s1, fill_value = 0))  #未对齐的数据将和填充值做运算\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:40:25.222777500Z",
     "start_time": "2024-07-05T08:40:25.209486500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    10.0\n",
      "1    10.0\n",
      "2    10.0\n",
      "3    10.0\n",
      "4    10.0\n",
      "5   -15.0\n",
      "6   -16.0\n",
      "7   -17.0\n",
      "8   -18.0\n",
      "9   -19.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(s2.sub(s1, fill_value = 0))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:40:52.438521500Z",
     "start_time": "2024-07-05T08:40:52.387553Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     a    b\n",
      "0  1.0  1.0\n",
      "1  1.0  1.0\n",
      "     a    b    c\n",
      "0  1.0  1.0  1.0\n",
      "1  1.0  1.0  1.0\n",
      "2  1.0  1.0  1.0\n",
      "--------------------------------------------------\n",
      "a    float64\n",
      "b    float64\n",
      "c    float64\n",
      "dtype: object\n",
      "     a    b   c\n",
      "0  0.0  0.0 NaN\n",
      "1  0.0  0.0 NaN\n",
      "2  NaN  NaN NaN\n"
     ]
    }
   ],
   "source": [
    "#df的对齐运算\n",
    "import numpy as np\n",
    "df1 = pd.DataFrame(np.ones((2,2)), columns = ['a', 'b'])\n",
    "df2 = pd.DataFrame(np.ones((3,3)), columns = ['a', 'b', 'c'])\n",
    "print(df1)\n",
    "print(df2)\n",
    "print('-'*50)\n",
    "print(df2.dtypes)\n",
    "print(df1-df2)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:42:59.716815800Z",
     "start_time": "2024-07-05T08:42:59.691908100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     a    b    c\n",
      "0  0.0  0.0 -1.0\n",
      "1  0.0  0.0 -1.0\n",
      "2 -1.0 -1.0 -1.0\n"
     ]
    }
   ],
   "source": [
    "print(df2.sub(df1, fill_value = 2)) #未对齐的数据将和填充值做运算"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:43:23.137878400Z",
     "start_time": "2024-07-05T08:43:23.128874300Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
